# AN EXTRAVERSION AFFAIR: AFRICAN STATES AND LARGESCALE LAND ACQUISITION SUBNATIONAL LOCATIONS
# Version of the script: 12/04/2022
# C�cile Richetta, University of Geneva, Department of Political Science and International Relations 

# Analysis in main text, tables in Appendix A and Appendix B
  # I. APPENDIX A: DESCRIPTIVE STATISTICS
  # II. APPENDIX B: MAIN MODELS (MODEL 1A, 1B AND 2 IN THE MAIN BODY OF THE ARTICLE)

# packages for weight matrix
library(maptools)
library(fields)
library(spdep)
library(data.table)
library(Matrix)
library(base)

# packages for analysis 
library(stargazer)
library(vtable)
library(car)
library(foreign)
library(plyr)
library(dplyr)
library(fastDummies)
library(splm)
library(spmle)
library(plm)
library(Hmisc)
spdep::set.ZeroPolicyOption(TRUE)
spdep::set.ZeroPolicyOption(TRUE)

options(scipen=999)
options(digits=7)

# data on land deals 
setwd("")
load("data.RData")

describe()

# open weight matrix
W <- readMM(file = "W.mtx") # matrix
W <- as(W, "dgCMatrix") # to dgc format
W_lw <- mat2listw(W, style="W")
  
  # temporal block diagonal matrix
  W08 <- W
  W09 <- W
  W10 <- W
  W11 <- W
  W12 <- W
  W13 <- W
  W14 <- W
  W15 <- W
  W16 <- W
  W17 <- W
  W18 <- W
  
  WT <- bdiag(list(W08, W09, W10, W11, W12, W13, W14, W15, W16, W17, W18))
  dimnames(WT) <- Map(c, dimnames(W08), dimnames(W09), dimnames(W10)
                      , dimnames(W11), dimnames(W12), dimnames(W13), dimnames(W14), dimnames(W15),
                      dimnames(W16), dimnames(W17), dimnames(W18))
  denom <- ifelse(rowSums(WT)==0, 1, rowSums(WT))
  WT <- Matrix(WT/denom, sparse = TRUE) # okay works and looks like FHC replication 
  WT_lw <- mat2listw(WT, style = "W")

# -------------------------------- APPENDIX A. DESCRIPTIVE STATISTICS -------------------------------- #
  # Table 1. Number of deals with geoprecision by country : non-replicable with dataset for analysis
  # Table 2. Descriptive statistics
  # mean, standard deviation, minimum and maximum
  st(data[, c(6:21)], out="csv", file="AppendixATable2.csv")
  # moran'is I
  moran.test(data$nllabi, listw = WT_lw) # moran's I, p < 0.000
  moran.test(data$nle_l1, listw = WT_lw) # moran's I, p < 0.000
  moran.test(data$pop_l1, listw = WT_lw) # moran's I, p < 0.000
  moran.test(data$polvio_l1, listw = WT_lw) # moran's I, p < 0.000
  moran.test(data$urban_l1, listw = WT_lw) # moran's I, p < 0.000
  moran.test(data$rainsum_l1, listw = WT_lw) # moran's I, p < 0.000
  moran.test(data$water_l1, listw = WT_lw) # moran's I, p < 0.000
  moran.test(data$crop_l1, listw = WT_lw) # moran's I, p < 0.000
  moran.test(data$nle_l2, listw = WT_lw) # moran's I, p < 0.000
  moran.test(data$pop_l2, listw = WT_lw) # moran's I, p < 0.000
  moran.test(data$polvio_l2, listw = WT_lw) # moran's I, p < 0.000
  moran.test(data$urban_l2, listw = WT_lw) # moran's I, p < 0.000
  moran.test(data$rainsum_l2, listw = WT_lw) # moran's I, p < 0.000
  moran.test(data$water_l2, listw = WT_lw) # moran's I, p < 0.000
  moran.test(data$crop_l2, listw = WT_lw) # moran's I, p < 0.000
  
  # Table 3. OLS regression results to investigate spatial and temporal dependence
  lm.results <- lm(nllabi~nle_l1+pop_l1+urban_l1+crop_l1+rainsum_l1+water_l1+polvio_l1+ 
                       gridarea + year_2009 + year_2010 + year_2011 + year_2012 + year_2013 +
                       year_2014 + year_2015 + year_2016 + year_2017 + year_2018 + ctyn_3 + ctyn_5 + ctyn_6 + ctyn_7 + ctyn_8 + ctyn_9 + ctyn_10 + ctyn_11 + 
                       ctyn_12 + ctyn_13 + ctyn_15 + ctyn_16 + ctyn_17 + ctyn_18 + ctyn_19 + ctyn_20 + ctyn_21 +
                       ctyn_23 + ctyn_24 + ctyn_25 + ctyn_26 + ctyn_27 + ctyn_28 + ctyn_29 + ctyn_30 + ctyn_31 +
                       ctyn_32 + ctyn_33 + ctyn_34 + ctyn_35 + ctyn_36, data = data)
  summary(lm.results)
  stargazer(lm.results, type="text", out="lm.txt")
  # Variance Inflation Factor
  vif(lm.results)
  # Moran's I test on the residuals of the OLS regression 
  lm.morantest(lm.results, listw = WT_lw) # p value 0.000 so spatial clustering in the residuals
  # Test for spatial interdependence 
  lm.result <- lm.LMtests(lm.results, WT_lw, test="all")
  t.lm.result <-t(sapply(lm.result, function(x) c(x$statistic, x$parameter, x$p.value)))
  colnames(t.lm.result) <-c("Statistic", "df", "p-value")
  printCoefmat((t.lm.result))  


# ------------------------------------- APPENDIX B. MAIN RESULTS --------------------------------------- #
  # Table 1. Results SPMLE analysis, with t-1 lag. Dependent variable: LSLA OCCURENCE.  
    # Model 1A: Random effects 
    datal1a <- data[, c(7:13, 21)]
    X1 <- cbind(1, as.matrix(datal1a))
    y <- data$nllabi
    ## Spatio-temporal probitusing SPMLE and Python speed-up
    ## Set data parameters
    N <- nrow(X1)/11
    TT <- 11
    has_temporal_lag <- TRUE
    has_spatial_lag <- TRUE
    zero_period <- TRUE
    ## Make the model object
    spmle1A <- STModel$new(y = y, X = X1,
                        N = N, TT = TT,
                        W_t = W,
                        has_temporal_lag = has_temporal_lag,
                        has_spatial_lag = has_spatial_lag,
                        zero_period = zero_period,
                        use_py = TRUE)  # use python speed-up!
    Model1A <- spmle1A$autotrain()
    ## Summary
    summary(Model1A)
    saveRDS(Model1A, "Model1A.rds")
    
    # Model 1B: Country and year fixed effects
    datal1b <- data[, c(7:13, 21, 27:57, 59:68)]
    X2 <- cbind(1, as.matrix(datal1b))
    ## Make the model object
    spmle1b <- STModel$new(y = y, X = X2,
                          N = N, TT = TT,
                          W_t = W,
                          has_temporal_lag = has_temporal_lag,
                          has_spatial_lag = has_spatial_lag,
                          zero_period = zero_period,
                          use_py = TRUE)  # use python speed-up!
    ## ML fit
    Model1B <- spmle1b$autotrain()
    ## Summary
    summary(Model1B)
    saveRDS(Model1B, "Model1B.rds")

  # Table 2. Results SAREM(2)SRRE analysis, with t-1 lag. Dependent variable: LSLA OCCURENCE. 
    fm <- nllabi ~ nllog_l1*polvio_l1 + nllog_l1*poplog_l1 + nllog_l1*water_l1 + nllog_l1*rainsum_l1 + nllog_l1*urban_l1 + 
     gridarea + year2009 + year2010 + year2011 + year2012 + year2013 + year2014 + year2015 + year2016 +
     year2017 + year2018 + ctyn_1 + ctyn_3 + ctyn_5 + ctyn_6 + ctyn_7 + ctyn_8 + ctyn_9 + ctyn_10 + ctyn_11 + 
     ctyn_12 + ctyn_13 + ctyn_15 + ctyn_16 + ctyn_17 + ctyn_18 + ctyn_19 + ctyn_20 + ctyn_21 +
     ctyn_23 + ctyn_24 + ctyn_25 + ctyn_26 + ctyn_27 + ctyn_28 + ctyn_29 + ctyn_30 + ctyn_31 +
     ctyn_32 + ctyn_33 + ctyn_34 + ctyn_35
    Model2 <- spreml(fm, data = data, w=W_lw,
                     error="sem2srre", Hess = FALSE, lag=T, zero.policy=TRUE)
    summary(Model2)
    ## calculate impact measures
    impac1 <- impacts(Model2, listw = mat2listw(W, style = "W"), time = 11)
    summary(impac1, zstats=TRUE, short=TRUE)
    saveRDS(model2, "Model2.rds")
  
  rm(list= ls())
  # ---------------------------------------- END SCRIPT ------------------------------------------#
  